import pandas as pd
import matplotlib.pyplot as plt
! pip install seaborn
Collecting seaborn
Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)
|████████████████████████████████| 292 kB 17.2 MB/s eta 0:00:01
Requirement already satisfied: numpy>=1.15 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from seaborn) (1.19.5)
Requirement already satisfied: scipy>=1.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from seaborn) (1.5.3)
Requirement already satisfied: pandas>=0.23 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from seaborn) (1.1.5)
Requirement already satisfied: matplotlib>=2.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from seaborn) (3.3.4)
Requirement already satisfied: kiwisolver>=1.0.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (1.3.1)
Requirement already satisfied: python-dateutil>=2.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)
Requirement already satisfied: cycler>=0.10 in /srv/conda/envs/notebook/lib/python3.6/site-packages/cycler-0.10.0-py3.6.egg (from matplotlib>=2.2->seaborn) (0.10.0)
Requirement already satisfied: pillow>=6.2.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (8.3.1)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)
Requirement already satisfied: six in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.16.0)
Requirement already satisfied: pytz>=2017.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from pandas>=0.23->seaborn) (2021.1)
Installing collected packages: seaborn
Successfully installed seaborn-0.11.2
import seaborn as sns
heart_stroke = pd.read_csv("healthcare-dataset-stroke-.csv.csv")
heart_stroke.head(10)
| id | gender | age | hypertension | heart_disease | ever_married | work_type | Residence_type | avg_glucose_level | bmi | smoking_status | stroke | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9046 | Male | 67.0 | 0 | 1 | Yes | Private | Urban | 228.69 | 36.6 | formerly smoked | 1 |
| 1 | 51676 | Female | 61.0 | 0 | 0 | Yes | Self-employed | Rural | 202.21 | NaN | never smoked | 1 |
| 2 | 31112 | Male | 80.0 | 0 | 1 | Yes | Private | Rural | 105.92 | 32.5 | never smoked | 1 |
| 3 | 60182 | Female | 49.0 | 0 | 0 | Yes | Private | Urban | 171.23 | 34.4 | smokes | 1 |
| 4 | 1665 | Female | 79.0 | 1 | 0 | Yes | Self-employed | Rural | 174.12 | 24.0 | never smoked | 1 |
| 5 | 56669 | Male | 81.0 | 0 | 0 | Yes | Private | Urban | 186.21 | 29.0 | formerly smoked | 1 |
| 6 | 53882 | Male | 74.0 | 1 | 1 | Yes | Private | Rural | 70.09 | 27.4 | never smoked | 1 |
| 7 | 10434 | Female | 69.0 | 0 | 0 | No | Private | Urban | 94.39 | 22.8 | never smoked | 1 |
| 8 | 27419 | Female | 59.0 | 0 | 0 | Yes | Private | Rural | 76.15 | NaN | Unknown | 1 |
| 9 | 60491 | Female | 78.0 | 0 | 0 | Yes | Private | Urban | 58.57 | 24.2 | Unknown | 1 |
heart_stroke.isna().sum()
id 0 gender 0 age 0 hypertension 0 heart_disease 0 ever_married 0 work_type 0 Residence_type 0 avg_glucose_level 0 bmi 201 smoking_status 0 stroke 0 dtype: int64
heart_stroke.fillna(heart_stroke['bmi'].mean(), inplace=True)
heart_stroke.head(10)
| id | gender | age | hypertension | heart_disease | ever_married | work_type | Residence_type | avg_glucose_level | bmi | smoking_status | stroke | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9046 | Male | 67.0 | 0 | 1 | Yes | Private | Urban | 228.69 | 36.600000 | formerly smoked | 1 |
| 1 | 51676 | Female | 61.0 | 0 | 0 | Yes | Self-employed | Rural | 202.21 | 28.893237 | never smoked | 1 |
| 2 | 31112 | Male | 80.0 | 0 | 1 | Yes | Private | Rural | 105.92 | 32.500000 | never smoked | 1 |
| 3 | 60182 | Female | 49.0 | 0 | 0 | Yes | Private | Urban | 171.23 | 34.400000 | smokes | 1 |
| 4 | 1665 | Female | 79.0 | 1 | 0 | Yes | Self-employed | Rural | 174.12 | 24.000000 | never smoked | 1 |
| 5 | 56669 | Male | 81.0 | 0 | 0 | Yes | Private | Urban | 186.21 | 29.000000 | formerly smoked | 1 |
| 6 | 53882 | Male | 74.0 | 1 | 1 | Yes | Private | Rural | 70.09 | 27.400000 | never smoked | 1 |
| 7 | 10434 | Female | 69.0 | 0 | 0 | No | Private | Urban | 94.39 | 22.800000 | never smoked | 1 |
| 8 | 27419 | Female | 59.0 | 0 | 0 | Yes | Private | Rural | 76.15 | 28.893237 | Unknown | 1 |
| 9 | 60491 | Female | 78.0 | 0 | 0 | Yes | Private | Urban | 58.57 | 24.200000 | Unknown | 1 |
heart_stroke.isna().sum()
id 0 gender 0 age 0 hypertension 0 heart_disease 0 ever_married 0 work_type 0 Residence_type 0 avg_glucose_level 0 bmi 0 smoking_status 0 stroke 0 dtype: int64
s_active = heart_stroke[heart_stroke['stroke'] == 1]
s_inactive = heart_stroke[heart_stroke['stroke'] == 0]
active_age = pd.DataFrame(s_active["age"])
inactive_age = pd.DataFrame(s_inactive["age"])
active_age.head()
| age | |
|---|---|
| 0 | 67.0 |
| 1 | 61.0 |
| 2 | 80.0 |
| 3 | 49.0 |
| 4 | 79.0 |
fig, ax = plt.subplots(figsize=(10, 6))
heart_stroke['age'] = heart_stroke['age'].astype(int)
rate = []
for i in range(heart_stroke['age'].min(), heart_stroke['age'].max()):
rate.append(heart_stroke[heart_stroke['age'] < i]['stroke'].sum() / len(heart_stroke[heart_stroke['age'] < i]['stroke']))
sns.lineplot(data=rate ,ax=ax);
ax.set(xlabel = 'Age', ylabel='Rate of getting Heart stroke')
/srv/conda/envs/notebook/lib/python3.6/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in long_scalars import sys
[Text(0.5, 0, 'Age'), Text(0, 0.5, 'Rate of getting Heart stroke')]
# KERNEL DISTRIBUTION PLOT
# giveS the probability density
fig, ax = plt.subplots(nrows=2,
ncols=1,
figsize=(8, 10))
ax[0].set(title='Density plot of Age having heart stroke')
sns.kdeplot(active_age["age"], label="Active", ax=ax[0])
ax[1].set(title='Density plot of Age not having heart stroke')
sns.kdeplot(inactive_age["age"],label="Inactive", ax=ax[1], color="red");
# active_smoke is smoking column hacving heart stroke = 1
# inactive_smoke is smoking column hacving heart stroke = 0
active_smoke = pd.DataFrame(s_active["smoking_status"].value_counts())
active_smoke["Percentage"] = active_smoke["smoking_status"].apply(lambda x: x/sum(active_smoke["smoking_status"])*100)
active_smoke
| smoking_status | Percentage | |
|---|---|---|
| never smoked | 90 | 36.144578 |
| formerly smoked | 70 | 28.112450 |
| Unknown | 47 | 18.875502 |
| smokes | 42 | 16.867470 |
inactive_smoke = pd.DataFrame(s_inactive['smoking_status'].value_counts())
inactive_smoke["Percentage"] = inactive_smoke["smoking_status"].apply(lambda x: x/sum(inactive_smoke["smoking_status"])*100)
inactive_smoke
| smoking_status | Percentage | |
|---|---|---|
| never smoked | 1802 | 37.070562 |
| Unknown | 1497 | 30.796132 |
| formerly smoked | 815 | 16.766098 |
| smokes | 747 | 15.367208 |
fig, ax = plt.subplots(nrows=2,
ncols=1,
figsize=(8, 10))
ax[0].barh(active_smoke.index, active_smoke['Percentage'], height=0.7)
ax[1].barh(inactive_smoke.index, inactive_smoke['Percentage'], height=0.7, color='red');
! pip install plotly
! pip install cufflinks
import plotly as ps
import cufflinks as cf
Collecting plotly
Downloading plotly-5.3.1-py2.py3-none-any.whl (23.9 MB)
|████████████████████████████████| 23.9 MB 20.2 MB/s eta 0:00:01
Collecting tenacity>=6.2.0
Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Requirement already satisfied: six in /srv/conda/envs/notebook/lib/python3.6/site-packages (from plotly) (1.16.0)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.3.1 tenacity-8.0.1
Collecting cufflinks
Downloading cufflinks-0.17.3.tar.gz (81 kB)
|████████████████████████████████| 81 kB 6.8 MB/s eta 0:00:01
Requirement already satisfied: numpy>=1.9.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (1.19.5)
Requirement already satisfied: pandas>=0.19.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (1.1.5)
Requirement already satisfied: plotly>=4.1.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (5.3.1)
Requirement already satisfied: six>=1.9.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (1.16.0)
Collecting colorlover>=0.2.1
Downloading colorlover-0.3.0-py3-none-any.whl (8.9 kB)
Requirement already satisfied: setuptools>=34.4.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (49.6.0.post20210108)
Requirement already satisfied: ipython>=5.3.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (7.16.1)
Requirement already satisfied: ipywidgets>=7.0.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cufflinks) (7.6.3)
Requirement already satisfied: pygments in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (2.9.0)
Requirement already satisfied: pickleshare in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (0.7.5)
Requirement already satisfied: jedi>=0.10 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (0.17.2)
Requirement already satisfied: decorator in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (4.4.2)
Requirement already satisfied: traitlets>=4.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (4.3.3)
Requirement already satisfied: pexpect in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (4.8.0)
Requirement already satisfied: backcall in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (0.2.0)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipython>=5.3.0->cufflinks) (3.0.19)
Requirement already satisfied: ipykernel>=4.5.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.5.5)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipywidgets>=7.0.0->cufflinks) (3.5.1)
Requirement already satisfied: nbformat>=4.2.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.1.3)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipywidgets>=7.0.0->cufflinks) (1.0.0)
Requirement already satisfied: jupyter-client in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1)
Requirement already satisfied: parso<0.8.0,>=0.7.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from jedi>=0.10->ipython>=5.3.0->cufflinks) (0.7.1)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.2.0)
Requirement already satisfied: ipython-genutils in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.2.0)
Requirement already satisfied: jupyter-core in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.7.1)
Requirement already satisfied: pyrsistent>=0.14.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.17.3)
Requirement already satisfied: importlib-metadata in /srv/conda/envs/notebook/lib/python3.6/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.6.3)
Requirement already satisfied: attrs>=17.4.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (21.2.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from pandas>=0.19.2->cufflinks) (2.8.2)
Requirement already satisfied: pytz>=2017.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from pandas>=0.19.2->cufflinks) (2021.1)
Requirement already satisfied: tenacity>=6.2.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from plotly>=4.1.1->cufflinks) (8.0.1)
Requirement already satisfied: wcwidth in /srv/conda/envs/notebook/lib/python3.6/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->cufflinks) (0.2.5)
Requirement already satisfied: notebook>=4.4.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.3.0)
Requirement already satisfied: nbconvert in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.0.7)
Requirement already satisfied: jinja2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.0.1)
Requirement already satisfied: prometheus-client in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.11.0)
Requirement already satisfied: pyzmq>=17 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (22.1.0)
Requirement already satisfied: argon2-cffi in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.1.0)
Requirement already satisfied: Send2Trash>=1.5.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.7.1)
Requirement already satisfied: terminado>=0.8.3 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.10.1)
Requirement already satisfied: ptyprocess in /srv/conda/envs/notebook/lib/python3.6/site-packages (from terminado>=0.8.3->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.7.0)
Requirement already satisfied: cffi>=1.0.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.14.6)
Requirement already satisfied: pycparser in /srv/conda/envs/notebook/lib/python3.6/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.20)
Requirement already satisfied: typing-extensions>=3.6.4 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from importlib-metadata->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.10.0.0)
Requirement already satisfied: zipp>=0.5 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from importlib-metadata->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.5.0)
Requirement already satisfied: MarkupSafe>=2.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.0.1)
Requirement already satisfied: pandocfilters>=1.4.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.4.2)
Requirement already satisfied: entrypoints>=0.2.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.3)
Requirement already satisfied: mistune<2,>=0.8.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.8.4)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.3)
Requirement already satisfied: jupyterlab-pygments in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.1.2)
Requirement already satisfied: bleach in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.3.1)
Requirement already satisfied: testpath in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.0)
Requirement already satisfied: defusedxml in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.7.1)
Requirement already satisfied: async-generator in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.10)
Requirement already satisfied: nest-asyncio in /srv/conda/envs/notebook/lib/python3.6/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.1)
Requirement already satisfied: webencodings in /srv/conda/envs/notebook/lib/python3.6/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.1)
Requirement already satisfied: packaging in /srv/conda/envs/notebook/lib/python3.6/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.0)
Requirement already satisfied: pyparsing>=2.0.2 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.4.7)
Building wheels for collected packages: cufflinks
Building wheel for cufflinks (setup.py) ... done
Created wheel for cufflinks: filename=cufflinks-0.17.3-py3-none-any.whl size=67922 sha256=b91331c4007993be769b6e5e7231483ec1fdcc28244d30b32240b43105ad49e8
Stored in directory: /home/jovyan/.cache/pip/wheels/1c/db/ce/70cf35eb5a61b9ea3d34434072a8821dddc4f21eb5127e5415
Successfully built cufflinks
Installing collected packages: colorlover, cufflinks
Successfully installed colorlover-0.3.0 cufflinks-0.17.3
import plotly.express as px
px.pie(heart_stroke, values='stroke', names="gender")
px.pie(heart_stroke, values='stroke', names="smoking_status")
px.pie(heart_stroke, values='stroke', names="ever_married")
px.pie(heart_stroke, values='stroke', names="work_type")
from sklearn.model_selection import train_test_split
x = heart_stroke.drop('stroke', axis=1)
y = heart_stroke['stroke']
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
heart_stroke.head(10)
| id | gender | age | hypertension | heart_disease | ever_married | work_type | Residence_type | avg_glucose_level | bmi | smoking_status | stroke | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9046 | Male | 67 | 0 | 1 | Yes | Private | Urban | 228.69 | 36.600000 | formerly smoked | 1 |
| 1 | 51676 | Female | 61 | 0 | 0 | Yes | Self-employed | Rural | 202.21 | 28.893237 | never smoked | 1 |
| 2 | 31112 | Male | 80 | 0 | 1 | Yes | Private | Rural | 105.92 | 32.500000 | never smoked | 1 |
| 3 | 60182 | Female | 49 | 0 | 0 | Yes | Private | Urban | 171.23 | 34.400000 | smokes | 1 |
| 4 | 1665 | Female | 79 | 1 | 0 | Yes | Self-employed | Rural | 174.12 | 24.000000 | never smoked | 1 |
| 5 | 56669 | Male | 81 | 0 | 0 | Yes | Private | Urban | 186.21 | 29.000000 | formerly smoked | 1 |
| 6 | 53882 | Male | 74 | 1 | 1 | Yes | Private | Rural | 70.09 | 27.400000 | never smoked | 1 |
| 7 | 10434 | Female | 69 | 0 | 0 | No | Private | Urban | 94.39 | 22.800000 | never smoked | 1 |
| 8 | 27419 | Female | 59 | 0 | 0 | Yes | Private | Rural | 76.15 | 28.893237 | Unknown | 1 |
| 9 | 60491 | Female | 78 | 0 | 0 | Yes | Private | Urban | 58.57 | 24.200000 | Unknown | 1 |
encoder = OneHotEncoder()
categorized_features = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
tr = ColumnTransformer([('encoder', encoder, categorized_features)])
t_x = tr.fit_transform(x)
t_x
array([[0., 1., 0., ..., 1., 0., 0.],
[1., 0., 0., ..., 0., 1., 0.],
[0., 1., 0., ..., 0., 1., 0.],
...,
[1., 0., 0., ..., 0., 1., 0.],
[0., 1., 0., ..., 1., 0., 0.],
[1., 0., 0., ..., 0., 0., 0.]])
x_train, x_test, y_train, y_test = train_test_split(t_x , y, test_size=0.2)
from sklearn.ensemble import RandomForestClassifier
mod = RandomForestClassifier()
mod.fit(x_train, y_train)
mod.score(x_test, y_test)
0.9549902152641878
a = y_test
pd.DataFrame(a).head(10)
| stroke | |
|---|---|
| 756 | 0 |
| 3465 | 0 |
| 636 | 0 |
| 889 | 0 |
| 3764 | 0 |
| 1795 | 0 |
| 4716 | 0 |
| 4552 | 0 |
| 3967 | 0 |
| 1680 | 0 |
b = mod.predict(x_test)
pd.DataFrame(b).head(10)
| 0 | |
|---|---|
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| 3 | 0 |
| 4 | 0 |
| 5 | 0 |
| 6 | 0 |
| 7 | 0 |
| 8 | 0 |
| 9 | 0 |
import numpy as np
arr1 = np.array(a.all())
arr2 = np.array(b.all())
if arr1==arr2:
print('Model is Working !')
Model is Working !
heart_stroke['gender'] = heart_stroke['gender'].replace({'Male':0,'Female':1,'Other':-1}).astype(np.uint8)
heart_stroke['Residence_type'] = heart_stroke['Residence_type'].replace({'Rural':0,'Urban':1}).astype(np.uint8)
heart_stroke['work_type'] = heart_stroke['work_type'].replace({'Private':0,'Self-employed':1,'Govt_job':2,'children':-1,'Never_worked':-2}).astype(np.uint8)
X = heart_stroke[['gender','age','hypertension','heart_disease','work_type','avg_glucose_level','bmi']]
y = heart_stroke['stroke']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.3, random_state=42)
X_test.head(3)
| gender | age | hypertension | heart_disease | work_type | avg_glucose_level | bmi | |
|---|---|---|---|---|---|---|---|
| 4688 | 0 | 31 | 0 | 0 | 1 | 64.85 | 23.0 |
| 4478 | 0 | 40 | 0 | 0 | 1 | 65.29 | 28.3 |
| 3849 | 1 | 8 | 0 | 0 | 255 | 74.42 | 22.5 |
! pip install imbalanced-learn
Collecting imbalanced-learn
Downloading imbalanced_learn-0.8.1-py3-none-any.whl (189 kB)
|████████████████████████████████| 189 kB 13.1 MB/s eta 0:00:01
Requirement already satisfied: joblib>=0.11 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from imbalanced-learn) (1.0.1)
Requirement already satisfied: scikit-learn>=0.24 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from imbalanced-learn) (0.24.2)
Requirement already satisfied: scipy>=0.19.1 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from imbalanced-learn) (1.5.3)
Requirement already satisfied: numpy>=1.13.3 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from imbalanced-learn) (1.19.5)
Requirement already satisfied: threadpoolctl>=2.0.0 in /srv/conda/envs/notebook/lib/python3.6/site-packages (from scikit-learn>=0.24->imbalanced-learn) (2.2.0)
Installing collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.8.1
import imblearn
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
#Importing SMOTE
from imblearn.over_sampling import SMOTE
sample = SMOTE()
X_train_1, y_train_1 =sample.fit_resample(X_train, y_train.ravel())
logreg_pipeline = Pipeline(steps = [('scale',StandardScaler()),('LR',LogisticRegression(random_state=42))])
logreg_cv = cross_val_score(logreg_pipeline,X_train_1,y_train_1,cv=10,scoring='f1')
print('Logistic Regression mean :',cross_val_score(logreg_pipeline,X_train_1,y_train_1,cv=10,scoring='f1').mean())
Logistic Regression mean : 0.8249824460155948
penalty = ['l1','l2']
C = [0.001, 0.01, 0.1, 1, 10, 100]
log_param_grid = {'penalty': penalty,
'C': C}
logreg = LogisticRegression()
grid = GridSearchCV(logreg,log_param_grid)
logreg_pipeline = Pipeline(steps = [('scale',StandardScaler()),('LR',LogisticRegression(C=0.1,penalty='l2',random_state=42))])
logreg_pipeline.fit(X_train_1,y_train_1)
#logreg.fit(X_train_1,y_train_1)
logreg_tuned_pred = logreg_pipeline.predict(X_test)
from sklearn.metrics import plot_confusion_matrix, classification_report,accuracy_score,f1_score
print(classification_report(y_test,logreg_tuned_pred))
print('Accuracy Score: ',accuracy_score(y_test,logreg_tuned_pred))
print('F1 Score: ',f1_score(y_test,logreg_tuned_pred))
precision recall f1-score support
0 0.97 0.76 0.85 3404
1 0.11 0.61 0.19 173
accuracy 0.75 3577
macro avg 0.54 0.68 0.52 3577
weighted avg 0.93 0.75 0.82 3577
Accuracy Score: 0.7528655297735533
F1 Score: 0.1919561243144424